# topic 17 # This will be remarkably similar to topics 15 and 16, so # much so that it would be worth comparing the three scripts. # First, set up the situation. We have a population # with an unknown proportion of some characteristic. # source("../gnrnd5.R") source("../gnrnd4.R") gnrnd5( 38579399907, 567685) # population size is 4000 items big_pop <- L1 # I do not know the proportion of 1's in big_pop # Now, someone says that they believe that the # proportion of 1's in big_pop is 20%. # Thus our null hypothesis is that the proportion # of 1's in big_pop is 0.20. We want to look at an # alternative that says the proportion of 1's is # greater than 0.20. This is a one-tailed test. # We will get a sample of size 80 and we will # look at the proportion of 1's in the sample. # We are willing to be wrong in telling the # person that they are wrong 5% of the time! # That is, even if the true proportion of 1's in # big_pop is 20%, we are willing to reject the null # hypothesis for 5% of the random samples that we take. # ######################## ## The critical value approach. We know that ## we can use a normal approximation to the distribution ## of sample proportions because 0.20*80 > 10. Also, ## our sample size is way less that 5% of the population. ## Assuming that the null hypothesis is true, the ## distribution of sample proportions, p_hat, will ## be N( 0.20, sqrt(0.20*(1-0.20)/80)) ## This is a one-tailed test because only ## a sample proportion that is too high ## would indicate that 0.20 is not the proportion. ## Therefore, find the value, in a normal distribution ## that has 0.05 as the P(X > x). # get the standard deviation p_sd <- sqrt(0.20*(1-0.20)/80) p_sd # long way high_z <- qnorm( 0.05, lower.tail=FALSE) high_z high_val <- 0.20 + high_z*p_sd high_val ####### pause and look at a shorter way to get ####### those two values high_val <-qnorm( 0.05, mean=0.20, sd=p_sd, lower.tail=FALSE) high_val ########## ########## # Get our sample # the first time we do this let us get the # same sample each time gnrnd4(962137901, 400000001) L1 # take those as the index values of our random sample our_samp <- big_pop[ L1 ] our_samp # find the number of 1's in our sample table( our_samp ) # what proportion of the sample is that 21/80 ## so now compare the proportion in the sample ## to our critical value. high_val ## In this case the sample proportion is not greater ## than our critical high. Therefore, at the ## 0.05 level of significance, we do not have evidence ## to reject null hypothesis that the true ## proportion is 0.20 ## in favor of the alternative hypothesis that ## the true proportion is greater than 0.20. ############# ############# the attained significance approach ## how strange would it be to get a proportion of ## 0.2625 for a sample of size 80 if the true ## proportion is 0.200? # pnorm( 0.2625, mean=0.20, sd=p_sd, lower.tail=FALSE) # # That probability is not less than our 5% level # of significance. Therefore, do not reject the # null hypothesis in favor of the alternative. ####################### Now use the function to ####################### do the same thing source("../hypo_prop.R") hypoth_test_prop( 0.20, 21, 80, 1, 0.05) ####################################### ####################################### # Now we want to repeat this process # but each time we want a different sample # of size 80 L1 <- sample( big_pop, 80 ) freqs <- table( L1 ) freqs num_x <- freqs[1] hypoth_test_prop( 0.20, num_x, 80, 1, 0.05) #### perform lines 115-119 again and again ### now, since we have the population let us peek # at the true proportion freqs <- table( big_pop ) freqs true_prop <- freqs[1]/4000 true_prop ####### Try our samples again, but this time test ## the null hypothesis that the true proportion ## is 0.24875, and do the test at the 0.05 ## level of significance. L1 <- sample( big_pop, 80 ) freqs <- table( L1 ) num_x <- freqs[1] hypoth_test_prop( 0.24875, num_x, 80, 1, 0.05) #### perform lines 132-135 again and again, #### and we should see a Type I error about #### 5% of the time. ### we can actually do this 1000 times and see how ### times we reject the null hypothesis even ### though it is true. L2 <- 1:1000 for( i in 1:1000) { L1 <- sample( big_pop, 80 ) freqs <- table( L1 ) num_x <- freqs[1] answer <- hypoth_test_prop( 0.24875, num_x, 80, 1, 0.05) L2[i] <- answer[13] } table( L2 )